# -*- coding:utf8 -*-
from numpy import *
import sys

def split_train_txt_to_two_parts():
    print  "splitting..."
    Data = {}
    flag = 1
    for line in open('C:\\Users\\wuxiaomin\\Desktop\\data\\SinaWeiBo\\weibo_train_data.txt','rb'):
        line.decode("utf-8")
        list_line = line.split('\t',6)
        if list_line[0] not in Data:
            Data[list_line[0]] = ["\t".join(list_line[0:6])+"\n"]
        else:
            Data[list_line[0]].append("\t".join(list_line[0:6])+"\n")
    f_train = open("C:\\Users\\wuxiaomin\\Desktop\\data\\SinaWeiBo\\offline\\offline_train.txt","w")
    f_test = open("C:\\Users\\wuxiaomin\\Desktop\\data\\SinaWeiBo\\offline\\offline_test.txt","w")
    for user in Data:
        say = "\r\tnum." + str(flag) + " user is saving"
        sys.stdout.write(say)
        sys.stdout.flush()
        feature = Data[user]
        num = len(feature)
        trainFeat = feature[0:(num/4)*3]
        testFeat = feature[(num/4)*3:]
        for i in range(len(trainFeat)):
            f_train.write(str(trainFeat[i]))
        for j in range(len(testFeat)):
            f_test.write(str(testFeat[j]))
        flag += 1

if __name__ == "__main__":
    split_train_txt_to_two_parts()


